Last updated: 2020-01-15 16:44:06
# Metadata ----------------------------------------------------------------
meta <- METAdata %>%
mutate(sample_GUID = gsub("_C[0-9]$", "", GUID))
# Number of GUIDs with metadata entries
length(unique(meta$GUID))
## [1] 6704
# Number of sample_GUIDs with metadata entries
length(unique(meta$sample_GUID))
## [1] 5900
# Number of sample_GUIDs with metadata entries in each category and
# Proportion of sample_GUIDs with metadata entries in each category
meta %>%
select(sample_GUID, Category) %>%
unique() %>%
group_by(Category) %>%
summarise(count = n(),
proportion = n() / nrow(.))
## # A tibble: 3 x 3
## Category count proportion
## <chr> <int> <dbl>
## 1 animal 2253 0.382
## 2 human 2068 0.351
## 3 other 1579 0.268
# Antibiograms ------------------------------------------------------------
atb <- ATBdata %>%
mutate(sample_GUID = gsub("_C[0-9]$", "", UNIQUE_SPARK_ID))
# Number of GUIDs with antibiogram profiles
length(unique(atb$UNIQUE_SPARK_ID))
## [1] 3740
# Number of sample_GUIDs with antibiogram profiles
length(unique(atb$sample_GUID))
## [1] 2936
# Number of sample_GUIDs with antibiograms in each category and
# Proportion of sample_GUIDs with antibiogram profiles in each category
meta %>%
filter(sample_GUID %in% atb$sample_GUID) %>%
select(sample_GUID, Category) %>%
unique() %>%
group_by(Category) %>%
summarise(count = n(),
proportion = n() / nrow(.))
## # A tibble: 3 x 3
## Category count proportion
## <chr> <int> <dbl>
## 1 animal 918 0.313
## 2 human 1430 0.487
## 3 other 588 0.200
# Number of sample_GUIDs with antibiogram profiles in each category and
# Proportion of sample_GUIDs with antibiogram profiles in each category
# relative to the total number of sample_GUIDs with metadata entries
meta %>%
filter(sample_GUID %in% atb$sample_GUID) %>%
select(sample_GUID, Category) %>%
unique() %>%
group_by(Category) %>%
summarise(n_atb = n()) %>%
merge(meta %>%
select(sample_GUID, Category) %>%
unique() %>%
group_by(Category) %>%
summarise(n_meta = n())) %>%
mutate(percentage = n_atb / n_meta)
## Category n_atb n_meta percentage
## 1 animal 918 2253 0.4074567
## 2 human 1430 2068 0.6914894
## 3 other 588 1579 0.3723876
# Distribution of numbers of antibiotics tested (that use MIC values)
atb %>%
filter(used_MIC == "yes",
Interpretation != "ND") %>%
select(sample_GUID, Antibiotic_name) %>%
unique() %>%
group_by(sample_GUID) %>%
summarise(count = n()) %$%
count %>%
summary()
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 13.00 26.00 27.00 24.96 27.00 28.00
# Find most common resistance profiles (by GUIDs)
tmp <- atb %>%
filter(used_MIC == "yes",
Interpretation != "ND")
antibiotics <- as.character(unique(tmp$Antibiotic_name))
profiles <- tmp %>%
reshape2::dcast(UNIQUE_SPARK_ID + sample_GUID ~ Antibiotic_name,
value.var = "Interpretation") %>%
mutate_at(vars(one_of(antibiotics)), ~ case_when(.=="R" ~ 1, T ~ 0)) %>%
mutate(profile = apply(.[antibiotics], 1, function(x)
paste0(x, collapse = "")))
top_profiles <- profiles %>%
group_by(profile) %>%
summarise(count = n()) %>%
arrange(desc(count))
top_profiles
## # A tibble: 413 x 2
## profile count
## <chr> <int>
## 1 0010000000000000000000000000 1897
## 2 0010000000000010000000000000 368
## 3 0000000000000000000000000000 181
## 4 0010000000000000000100000000 154
## 5 0010000000000000000001000000 105
## 6 0110100000000000000000000000 97
## 7 0110101000100000000000000000 34
## 8 0010000000000010000001000000 30
## 9 0000000000000010000000000000 28
## 10 0010000000000010000100000000 21
## # … with 403 more rows
# Most common antibiotic profiles (note that the 3 most common profile is a
# resistance to nothing)
lapply(1:10, function(x) {
profiles %>%
filter(profile == top_profiles$profile[x]) %>%
.[antibiotics] %>%
colSums() %>%
.[. != 0] %>%
names()
})
## [[1]]
## [1] "Ampicillin"
##
## [[2]]
## [1] "Ampicillin" "Fosfomycin_G6P"
##
## [[3]]
## character(0)
##
## [[4]]
## [1] "Ampicillin" "Nitrofurantoin"
##
## [[5]]
## [1] "Ampicillin" "Piperacillin"
##
## [[6]]
## [1] "Amoxicillin_clavulanic_acid" "Ampicillin"
## [3] "Cefalexin"
##
## [[7]]
## [1] "Amoxicillin_clavulanic_acid" "Ampicillin"
## [3] "Cefalexin" "Cefixime"
## [5] "Cefuroxime"
##
## [[8]]
## [1] "Ampicillin" "Fosfomycin_G6P" "Piperacillin"
##
## [[9]]
## [1] "Fosfomycin_G6P"
##
## [[10]]
## [1] "Ampicillin" "Fosfomycin_G6P" "Nitrofurantoin"
# Sequences ---------------------------------------------------------------
kleb <- KLEBdata %>%
mutate(sample_GUID = gsub("_C[0-9]$", "", GUID))
# Number of GUIDs with seqences
length(unique(kleb$GUID))
## [1] 3510
# Number of sample_GUIDs with seqences
length(unique(kleb$sample_GUID))
## [1] 2797
# Number of sample_GUIDs with sequences in each category and
# Proportion of sample_GUIDs with sequences in each category
meta %>%
select(sample_GUID, Category) %>%
unique() %>%
filter(sample_GUID %in% kleb$sample_GUID) %>%
group_by(Category) %>%
summarise(count = n(),
proportion = n() / nrow(.))
## # A tibble: 3 x 3
## Category count proportion
## <chr> <int> <dbl>
## 1 animal 877 0.314
## 2 human 1346 0.481
## 3 other 574 0.205
# Number of sample_GUIDs with sequences from each pathogen and
# Proportion of sample_GUIDs with sequences from each pathogen
kleb %>%
select(sample_GUID, species) %>%
unique() %>%
group_by(species) %>%
summarise(count = n(),
proportion = n() / nrow(.)) %>%
arrange(desc(proportion))
## # A tibble: 17 x 3
## species count proportion
## <chr> <int> <dbl>
## 1 Klebsiella pneumoniae 1533 0.475
## 2 Klebsiella michiganensis 281 0.0871
## 3 Klebsiella variicola subsp. variicola 265 0.0821
## 4 Klebsiella oxytoca 240 0.0744
## 5 Klebsiella (Raoultella) ornithinolytica 234 0.0725
## 6 Klebsiella grimontii 176 0.0546
## 7 Klebsiella aerogenes 167 0.0518
## 8 Klebsiella (Raoultella) planticola 117 0.0363
## 9 Klebsiella quasipneumoniae subsp. quasipneumoniae 94 0.0291
## 10 Klebsiella quasipneumoniae subsp. similipneumoniae 42 0.0130
## 11 Klebsiella pasteurii 26 0.00806
## 12 Klebsiella (Raoultella) terrigena 22 0.00682
## 13 Klebsiella spallanzanii 10 0.00310
## 14 Klebsiella huaxiensis 8 0.00248
## 15 Raoultella quasiterrigena 6 0.00186
## 16 Klebsiella quasivariicola 4 0.00124
## 17 unknown 1 0.000310
# Start of modelling section ----------------------------------------------
antibiotic_class <- atb %>%
filter(used_MIC == "yes") %$%
Classification %>%
unique()
antibiotic_class
## [1] "Aminoglycoside" "Penicillin Combination"
## [3] "Penicillin" "Monobactam"
## [5] "Cephalosporin" "Fluoroquinolone"
## [7] "Colistin" "Carbapenem"
## [9] "Fosfomycin" "Quinolone"
## [11] "Nitrofurantoin" "Penicillin (Penams)"
## [13] "Tetracycline" "Trimethoprim"
## [15] "Trimethoprim/Sulfamethoxazole"
resistances <- lapply(antibiotic_class, function(x) {
id <- atb %>%
filter(Classification == x,
Interpretation == "R") %$%
UNIQUE_SPARK_ID %>%
unique()
data <- meta %>%
mutate(response = case_when(GUID %in% id ~ "resistant",
T ~ "not resistant"))
data %>%
group_by(Category, response) %>%
summarise(Count = n()) %>%
complete(Category = unique(Category),
response = c("resistant", "not resistant"),
fill = list(Count = 0)) %>%
ungroup() %>%
mutate(class = x)
}) %>%
do.call(rbind.data.frame, .)
# Put plots in this order
sort_order <- resistances %>%
filter(response == "resistant") %>%
group_by(class) %>%
summarise(total = sum(Count)) %>%
arrange(desc(total)) %$%
class
resistances %<>%
mutate(class = factor(class, levels = sort_order))
# Plot the number of GUIDs resistant to (red) and not resistant to (grey) each
# antibiotic
resistances %>%
ggplot() + theme_minimal() + facet_wrap(~class) +
geom_bar(aes(x = Category, y = Count, fill = response),
colour = "black", stat = "identity") +
scale_fill_manual(values = c("grey", "red"))
# Plot the resistance to each antibiotic as a percentage (of GUIDs)
resistances %>%
group_by(Category, class) %>%
summarise(percentage = Count[response == "resistant"] / sum(Count)) %>%
ggplot() + theme_minimal() + facet_wrap(~class) +
geom_bar(aes(x = Category, y = percentage),
colour = "black", fill = "red", stat = "identity") +
labs(title = "Percentage of resistant samples")
# List values used in the above plot (percentage resistant) categorised
# as human, animal, or other
resistances %>%
group_by(Category, class) %>%
summarise(percentage = Count[response == "resistant"] / sum(Count)) %>%
data.frame()
## Category class percentage
## 1 animal Penicillin 0.4385048232
## 2 animal Penicillin Combination 0.0418006431
## 3 animal Cephalosporin 0.0365755627
## 4 animal Fosfomycin 0.0643086817
## 5 animal Penicillin (Penams) 0.0293408360
## 6 animal Fluoroquinolone 0.0164790997
## 7 animal Trimethoprim/Sulfamethoxazole 0.0136655949
## 8 animal Aminoglycoside 0.0044212219
## 9 animal Nitrofurantoin 0.0458199357
## 10 animal Carbapenem 0.0000000000
## 11 animal Trimethoprim 0.0152733119
## 12 animal Tetracycline 0.0036173633
## 13 animal Monobactam 0.0064308682
## 14 animal Colistin 0.0016077170
## 15 animal Quinolone 0.0000000000
## 16 human Penicillin 0.6233489561
## 17 human Penicillin Combination 0.2275244994
## 18 human Cephalosporin 0.2202812101
## 19 human Fosfomycin 0.1261184491
## 20 human Penicillin (Penams) 0.1887515978
## 21 human Fluoroquinolone 0.2011077972
## 22 human Trimethoprim/Sulfamethoxazole 0.1708564124
## 23 human Aminoglycoside 0.1640391990
## 24 human Nitrofurantoin 0.0639113762
## 25 human Carbapenem 0.0920323818
## 26 human Trimethoprim 0.0703025138
## 27 human Tetracycline 0.0630592245
## 28 human Monobactam 0.0519812527
## 29 human Colistin 0.0191734129
## 30 human Quinolone 0.0000000000
## 31 other Penicillin 0.4456928839
## 32 other Penicillin Combination 0.0518994114
## 33 other Cephalosporin 0.0444087747
## 34 other Fosfomycin 0.0882825040
## 35 other Penicillin (Penams) 0.0406634564
## 36 other Fluoroquinolone 0.0107009096
## 37 other Trimethoprim/Sulfamethoxazole 0.0069555912
## 38 other Aminoglycoside 0.0048154093
## 39 other Nitrofurantoin 0.0347779561
## 40 other Carbapenem 0.0032102729
## 41 other Trimethoprim 0.0074906367
## 42 other Tetracycline 0.0037453184
## 43 other Monobactam 0.0042803638
## 44 other Colistin 0.0005350455
## 45 other Quinolone 0.0000000000
# List percentage resistant (combining all human, animal, and other samples)
resistances %>%
group_by(response, class) %>%
summarise(total = sum(Count)) %>%
group_by(class) %>%
summarise(percentage = total[response == "resistant"] / sum(total)) %>%
data.frame()
## class percentage
## 1 Penicillin 0.505220764
## 2 Penicillin Combination 0.109636038
## 3 Cephalosporin 0.103072792
## 4 Fosfomycin 0.092631265
## 5 Penicillin (Penams) 0.088305489
## 6 Fluoroquinolone 0.079504773
## 7 Trimethoprim/Sulfamethoxazole 0.066825776
## 8 Aminoglycoside 0.060411695
## 9 Nitrofurantoin 0.049075179
## 10 Carbapenem 0.033114558
## 11 Trimethoprim 0.032368735
## 12 Tetracycline 0.024463007
## 13 Monobactam 0.021778043
## 14 Colistin 0.007458234
## 15 Quinolone 0.000000000
# Penicillin --------------------------------------------------------------
# --- Animal
df <- get_data("animal", "Penicillin")
full_model <- glm(response ~ ., binomial(link = "logit"), df)
best_model <- MASS::stepAIC(full_model)
## Start: AIC=243.08
## response ~ ASSOCIATED_GROUP + Livestock + Companion_animal +
## Wild_animal + PATHOGEN
##
##
## Step: AIC=243.08
## response ~ ASSOCIATED_GROUP + Livestock + Companion_animal +
## PATHOGEN
##
## Df Deviance AIC
## - ASSOCIATED_GROUP 5 210.8 238.8
## - Companion_animal 1 205.8 241.8
## - Livestock 1 207.1 243.1
## <none> 205.1 243.1
## - PATHOGEN 11 3203.0 3219.0
##
## Step: AIC=238.79
## response ~ Livestock + Companion_animal + PATHOGEN
##
## Df Deviance AIC
## - Companion_animal 1 211.0 237.0
## - Livestock 1 211.5 237.5
## <none> 210.8 238.8
## - PATHOGEN 11 3398.9 3404.9
##
## Step: AIC=237.03
## response ~ Livestock + PATHOGEN
##
## Df Deviance AIC
## - Livestock 1 212.3 236.3
## <none> 211.0 237.0
## - PATHOGEN 11 3410.2 3414.2
##
## Step: AIC=236.33
## response ~ PATHOGEN
##
## Df Deviance AIC
## <none> 212.3 236.3
## - PATHOGEN 11 3411.4 3413.4
title <- gsub("response", "Penicillin", deparse(best_model$formula))
apen <- plotROC(df, best_model, title)
apen
# --- Human
df <- get_data("human", "Penicillin")
full_model <- glm(response ~ ., binomial(link = "logit"), df %>%
select(-HOSPITAL_WARD))
best_model <- MASS::stepAIC(full_model)
## Start: AIC=612.29
## response ~ Clinical + SPECIFIC_GROUP + PATHOGEN + SEX + AGE +
## AGE_GROUP
##
## Df Deviance AIC
## - AGE_GROUP 9 560.57 600.57
## - SEX 1 554.85 610.85
## - AGE 1 556.15 612.15
## <none> 554.29 612.29
## - Clinical 2 561.14 615.14
## - SPECIFIC_GROUP 5 1147.93 1195.93
## - PATHOGEN 10 2784.35 2822.35
##
## Step: AIC=600.57
## response ~ Clinical + SPECIFIC_GROUP + PATHOGEN + SEX + AGE
##
## Df Deviance AIC
## - SEX 1 561.08 599.08
## <none> 560.57 600.57
## - AGE 1 564.20 602.20
## - Clinical 2 567.35 603.35
## - SPECIFIC_GROUP 5 1174.75 1204.75
## - PATHOGEN 10 2793.18 2813.18
##
## Step: AIC=599.08
## response ~ Clinical + SPECIFIC_GROUP + PATHOGEN + AGE
##
## Df Deviance AIC
## <none> 561.08 599.08
## - AGE 1 564.59 600.59
## - Clinical 2 567.90 601.90
## - SPECIFIC_GROUP 5 1197.08 1225.08
## - PATHOGEN 10 2793.19 2811.19
best_model$formula
## response ~ Clinical + SPECIFIC_GROUP + PATHOGEN + AGE
# Adding ward to this as a random effect results in an error
title <- gsub("response", "Penicillin", deparse(best_model$formula))
hpen <- plotROC(df, best_model, title)
hpen
# --- Other
df <- get_data("other", "Penicillin")
full_model <- glm(response ~ ., binomial(link = "logit"), df)
best_model <- MASS::stepAIC(full_model)
## Start: AIC=1906.72
## response ~ Plant_pre_harvest + Plant_post_harvest + Water + Soil +
## Other + Hospital + ORIGIN
##
## Df Deviance AIC
## - Plant_post_harvest 1 1873.2 1905.2
## <none> 1872.7 1906.7
## - Plant_pre_harvest 1 1876.0 1908.0
## - Other 1 1876.9 1908.9
## - Soil 1 1896.7 1928.7
## - Water 1 1901.0 1933.0
## - Hospital 3 1916.8 1944.8
## - ORIGIN 8 1934.2 1952.2
##
## Step: AIC=1905.18
## response ~ Plant_pre_harvest + Water + Soil + Other + Hospital +
## ORIGIN
##
## Df Deviance AIC
## <none> 1873.2 1905.2
## - Plant_pre_harvest 1 1878.5 1908.5
## - Other 1 1879.7 1909.7
## - Soil 1 1899.3 1929.3
## - Hospital 3 1919.1 1945.1
## - ORIGIN 8 1934.8 1950.8
## - Water 1 1974.1 2004.1
title <- paste(deparse(best_model$formula), collapse = "") %>%
gsub("response", "Penicillin", .) %>%
gsub(" ", "", .)
open <- plotROC(df, best_model, title)
open
# Penicillin Combination --------------------------------------------------
# --- Animal
df <- get_data("animal", "Penicillin Combination")
full_model <- glm(response ~ ., binomial(link = "logit"), df)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
tmp <- alias(full_model)$Complete
colinear_variables <- tmp[, which(colSums(tmp) != 0), drop = F]
df %<>% select(-Wild_animal)
full_model <- glm(response ~ ., binomial(link = "logit"), df)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
df %<>% select(-PATHOGEN)
full_model <- glm(response ~ ., binomial(link = "logit"), df)
best_model <- MASS::stepAIC(full_model)
## Start: AIC=856.54
## response ~ ASSOCIATED_GROUP + Livestock + Companion_animal
##
## Df Deviance AIC
## - Livestock 1 842.36 856.36
## <none> 840.54 856.54
## - Companion_animal 1 843.10 857.10
## - ASSOCIATED_GROUP 5 858.48 864.48
##
## Step: AIC=856.36
## response ~ ASSOCIATED_GROUP + Companion_animal
##
## Df Deviance AIC
## <none> 842.36 856.36
## - Companion_animal 1 844.88 856.88
## - ASSOCIATED_GROUP 5 859.68 863.68
title <- gsub("response", "PenC", deparse(best_model$formula))
apenc <- plotROC(df, best_model, title)
apenc
# --- Human
df <- get_data("human", "Penicillin Combination")
full_model <- glm(response ~ ., binomial(link = "logit"), df %>%
select(-HOSPITAL_WARD))
best_model <- MASS::stepAIC(full_model)
## Start: AIC=1718.27
## response ~ Clinical + SPECIFIC_GROUP + PATHOGEN + SEX + AGE +
## AGE_GROUP
##
## Df Deviance AIC
## - AGE 1 1660.6 1716.6
## <none> 1660.3 1718.3
## - AGE_GROUP 9 1679.2 1719.2
## - Clinical 2 1677.1 1731.1
## - SEX 1 1676.0 1732.0
## - SPECIFIC_GROUP 5 1716.6 1764.6
## - PATHOGEN 10 2346.5 2384.5
##
## Step: AIC=1716.59
## response ~ Clinical + SPECIFIC_GROUP + PATHOGEN + SEX + AGE_GROUP
##
## Df Deviance AIC
## <none> 1660.6 1716.6
## - AGE_GROUP 9 1689.2 1727.2
## - Clinical 2 1677.4 1729.4
## - SEX 1 1676.3 1730.3
## - SPECIFIC_GROUP 5 1717.6 1763.6
## - PATHOGEN 10 2346.6 2382.6
title <- gsub("response", "PenC", deparse(best_model$formula))
hpenc <- plotROC(df, best_model, title)
hpenc
# --- Other
df <- get_data("other", "Penicillin Combination")
full_model <- glm(response ~ ., binomial(link = "logit"), df)
best_model <- MASS::stepAIC(full_model)
## Start: AIC=712.14
## response ~ Plant_pre_harvest + Plant_post_harvest + Water + Soil +
## Other + Hospital + ORIGIN
##
## Df Deviance AIC
## - Plant_pre_harvest 1 678.22 710.22
## - Other 1 678.47 710.47
## - Plant_post_harvest 1 678.50 710.50
## - ORIGIN 8 692.95 710.95
## <none> 678.14 712.14
## - Water 1 681.05 713.05
## - Soil 1 684.72 716.72
## - Hospital 3 691.53 719.53
##
## Step: AIC=710.22
## response ~ Plant_post_harvest + Water + Soil + Other + Hospital +
## ORIGIN
##
## Df Deviance AIC
## - Other 1 678.51 708.51
## - Plant_post_harvest 1 678.53 708.53
## - ORIGIN 8 693.26 709.26
## <none> 678.22 710.22
## - Soil 1 685.07 715.07
## - Hospital 3 694.55 720.55
## - Water 1 694.41 724.41
##
## Step: AIC=708.51
## response ~ Plant_post_harvest + Water + Soil + Hospital + ORIGIN
##
## Df Deviance AIC
## - Plant_post_harvest 1 678.72 706.72
## - ORIGIN 8 693.36 707.36
## <none> 678.51 708.51
## - Hospital 3 697.24 721.24
## - Soil 1 695.02 723.02
## - Water 1 726.13 754.13
##
## Step: AIC=706.72
## response ~ Water + Soil + Hospital + ORIGIN
##
## Df Deviance AIC
## <none> 678.72 706.72
## - ORIGIN 8 695.81 707.81
## - Hospital 3 697.51 719.51
## - Soil 1 695.04 721.04
## - Water 1 727.18 753.18
title <- gsub("response", "PenC", deparse(best_model$formula))
openc <- plotROC(df, best_model, title)
openc
# Cephalosporin -----------------------------------------------------------
# --- Animal
df <- get_data("animal", "Cephalosporin")
full_model <- glm(response ~ ., binomial(link = "logit"), df)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
tmp <- alias(full_model)$Complete
colinear_variables <- tmp[, which(colSums(tmp) != 0), drop = F]
df %<>% select(-PATHOGEN, -Wild_animal)
full_model <- glm(response ~ ., binomial(link = "logit"), df)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
df %<>% filter(ASSOCIATED_GROUP != "cheese")
full_model <- glm(response ~ ., binomial(link = "logit"), df)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
model1 <- glm(response ~ ASSOCIATED_GROUP, binomial(link = "logit"), df)
model2 <- glm(response ~ Livestock + Companion_animal,
binomial(link = "logit"), df)
AIC(model1, model2) # model1 is better
## df AIC
## model1 5 760.8327
## model2 3 784.6170
title <- gsub("response", "Ceph", deparse(best_model$formula))
acep <- plotROC(df, model1, title)
acep
# --- Human
df <- get_data("human", "Cephalosporin")
full_model <- glm(response ~ ., binomial(link = "logit"), df %>%
select(-HOSPITAL_WARD))
best_model <- MASS::stepAIC(full_model)
## Start: AIC=1629.3
## response ~ Clinical + SPECIFIC_GROUP + PATHOGEN + SEX + AGE +
## AGE_GROUP
##
## Df Deviance AIC
## - AGE 1 1571.3 1627.3
## <none> 1571.3 1629.3
## - AGE_GROUP 9 1589.8 1629.8
## - Clinical 2 1586.9 1640.9
## - SEX 1 1597.4 1653.4
## - SPECIFIC_GROUP 5 1638.0 1686.0
## - PATHOGEN 10 2276.0 2314.0
##
## Step: AIC=1627.3
## response ~ Clinical + SPECIFIC_GROUP + PATHOGEN + SEX + AGE_GROUP
##
## Df Deviance AIC
## <none> 1571.3 1627.3
## - Clinical 2 1586.9 1638.9
## - AGE_GROUP 9 1606.0 1644.0
## - SEX 1 1597.5 1651.5
## - SPECIFIC_GROUP 5 1638.2 1684.2
## - PATHOGEN 10 2276.7 2312.7
title <- gsub("response", "Ceph", deparse(best_model$formula))
hcep <- plotROC(df, best_model, title)
hcep
# --- Other
df <- get_data("other", "Cephalosporin")
full_model <- glm(response ~ ., binomial(link = "logit"), df)
best_model <- MASS::stepAIC(full_model)
## Start: AIC=638.84
## response ~ Plant_pre_harvest + Plant_post_harvest + Water + Soil +
## Other + Hospital + ORIGIN
##
## Df Deviance AIC
## - Water 1 605.31 637.31
## - Plant_post_harvest 1 606.22 638.22
## - Plant_pre_harvest 1 606.23 638.23
## - Other 1 606.30 638.30
## <none> 604.84 638.84
## - ORIGIN 8 621.59 639.59
## - Soil 1 609.44 641.44
## - Hospital 3 616.32 644.32
##
## Step: AIC=637.31
## response ~ Plant_pre_harvest + Plant_post_harvest + Soil + Other +
## Hospital + ORIGIN
##
## Df Deviance AIC
## <none> 605.31 637.31
## - ORIGIN 8 622.44 638.44
## - Soil 1 609.66 639.66
## - Plant_post_harvest 1 611.50 641.50
## - Hospital 3 617.59 643.59
## - Plant_pre_harvest 1 618.41 648.41
## - Other 1 634.90 664.90
title <- paste(deparse(best_model$formula), collapse = "") %>%
gsub("response", "Ceph", .) %>%
gsub(" ", "", .)
ocep <- plotROC(df, best_model, title)
ocep
# Fosfomycin --------------------------------------------------------------
# --- Animal
df <- get_data("animal", "Fosfomycin")
full_model <- glm(response ~ ., binomial(link = "logit"), df)
best_model <- MASS::stepAIC(full_model)
## Start: AIC=838.34
## response ~ ASSOCIATED_GROUP + Livestock + Companion_animal +
## Wild_animal + PATHOGEN
##
##
## Step: AIC=838.34
## response ~ ASSOCIATED_GROUP + Livestock + Companion_animal +
## PATHOGEN
##
## Df Deviance AIC
## - ASSOCIATED_GROUP 5 808.38 836.38
## <none> 800.34 838.34
## - Livestock 1 803.22 839.22
## - Companion_animal 1 803.67 839.67
## - PATHOGEN 11 1163.80 1179.80
##
## Step: AIC=836.38
## response ~ Livestock + Companion_animal + PATHOGEN
##
## Df Deviance AIC
## - Livestock 1 809.73 835.73
## - Companion_animal 1 810.06 836.06
## <none> 808.38 836.38
## - PATHOGEN 11 1179.74 1185.74
##
## Step: AIC=835.73
## response ~ Companion_animal + PATHOGEN
##
## Df Deviance AIC
## - Companion_animal 1 810.36 834.36
## <none> 809.73 835.73
## - PATHOGEN 11 1185.24 1189.24
##
## Step: AIC=834.36
## response ~ PATHOGEN
##
## Df Deviance AIC
## <none> 810.36 834.36
## - PATHOGEN 11 1187.58 1189.58
title <- gsub("response", "Fos", deparse(best_model$formula))
afos <- plotROC(df, best_model, title)
afos
# --- Human
df <- get_data("human", "Fosfomycin")
full_model <- glm(response ~ ., binomial(link = "logit"), df %>%
select(-HOSPITAL_WARD))
best_model <- MASS::stepAIC(full_model)
## Start: AIC=1513.17
## response ~ Clinical + SPECIFIC_GROUP + PATHOGEN + SEX + AGE +
## AGE_GROUP
##
## Df Deviance AIC
## - SPECIFIC_GROUP 5 1458.6 1506.6
## - AGE_GROUP 9 1466.8 1506.8
## - SEX 1 1455.5 1511.5
## - AGE 1 1456.2 1512.2
## <none> 1455.2 1513.2
## - Clinical 2 1460.1 1514.1
## - PATHOGEN 10 1731.8 1769.8
##
## Step: AIC=1506.63
## response ~ Clinical + PATHOGEN + SEX + AGE + AGE_GROUP
##
## Df Deviance AIC
## - AGE_GROUP 9 1470.7 1500.7
## - SEX 1 1459.2 1505.2
## - AGE 1 1459.4 1505.4
## <none> 1458.6 1506.6
## - Clinical 2 1463.6 1507.6
## - PATHOGEN 10 1739.3 1767.3
##
## Step: AIC=1500.66
## response ~ Clinical + PATHOGEN + SEX + AGE
##
## Df Deviance AIC
## - SEX 1 1471.0 1499.0
## <none> 1470.7 1500.7
## - Clinical 2 1475.2 1501.2
## - AGE 1 1475.0 1503.0
## - PATHOGEN 10 1754.6 1764.6
##
## Step: AIC=1499.05
## response ~ Clinical + PATHOGEN + AGE
##
## Df Deviance AIC
## <none> 1471.0 1499.0
## - Clinical 2 1475.4 1499.4
## - AGE 1 1475.2 1501.2
## - PATHOGEN 10 1755.4 1763.4
title <- gsub("response", "Fos", deparse(best_model$formula))
hfos <- plotROC(df, best_model, title)
hfos
# --- Other
df <- get_data("other", "Fosfomycin")
full_model <- glm(response ~ ., binomial(link = "logit"), df)
best_model <- MASS::stepAIC(full_model)
## Start: AIC=1069.9
## response ~ Plant_pre_harvest + Plant_post_harvest + Water + Soil +
## Other + Hospital + ORIGIN
##
## Df Deviance AIC
## - Other 1 1037.9 1069.9
## <none> 1035.9 1069.9
## - ORIGIN 8 1055.2 1073.2
## - Plant_post_harvest 1 1041.9 1073.9
## - Hospital 3 1046.3 1074.3
## - Plant_pre_harvest 1 1043.2 1075.2
## - Soil 1 1043.3 1075.3
## - Water 1 1049.4 1081.4
##
## Step: AIC=1069.86
## response ~ Plant_pre_harvest + Plant_post_harvest + Water + Soil +
## Hospital + ORIGIN
##
## Df Deviance AIC
## <none> 1037.9 1069.9
## - Hospital 3 1046.3 1072.3
## - ORIGIN 8 1057.3 1073.3
## - Plant_post_harvest 1 1043.3 1073.3
## - Soil 1 1043.7 1073.7
## - Plant_pre_harvest 1 1046.5 1076.5
## - Water 1 1080.9 1110.9
title <- paste(deparse(best_model$formula), collapse = "") %>%
gsub("response", "Fos", .) %>%
gsub(" ", "", .)
ofos <- plotROC(df, best_model, title)
ofos
# Penicillin (Penams) -----------------------------------------------------
# --- Animal
df <- get_data("animal", "Penicillin (Penams)")
full_model <- glm(response ~ ., binomial(link = "logit"), df)
best_model <- MASS::stepAIC(full_model)
## Start: AIC=543.13
## response ~ ASSOCIATED_GROUP + Livestock + Companion_animal +
## Wild_animal + PATHOGEN
##
##
## Step: AIC=543.13
## response ~ ASSOCIATED_GROUP + Livestock + Companion_animal +
## PATHOGEN
##
## Df Deviance AIC
## - ASSOCIATED_GROUP 5 510.61 538.61
## - Livestock 1 505.15 541.15
## <none> 505.13 543.13
## - Companion_animal 1 507.54 543.54
## - PATHOGEN 11 645.58 661.58
##
## Step: AIC=538.61
## response ~ Livestock + Companion_animal + PATHOGEN
##
## Df Deviance AIC
## - Livestock 1 510.67 536.67
## <none> 510.61 538.61
## - Companion_animal 1 521.15 547.15
## - PATHOGEN 11 648.86 654.86
##
## Step: AIC=536.67
## response ~ Companion_animal + PATHOGEN
##
## Df Deviance AIC
## <none> 510.67 536.67
## - Companion_animal 1 525.99 549.99
## - PATHOGEN 11 648.98 652.98
title <- gsub("response", "Pen (Pen)", deparse(best_model$formula))
app <- plotROC(df, best_model, title)
app
# --- Human
df <- get_data("human", "Penicillin (Penams)")
full_model <- glm(response ~ ., binomial(link = "logit"), df %>%
select(-HOSPITAL_WARD))
best_model <- MASS::stepAIC(full_model)
## Start: AIC=1738.59
## response ~ Clinical + SPECIFIC_GROUP + PATHOGEN + SEX + AGE +
## AGE_GROUP
##
## Df Deviance AIC
## - AGE_GROUP 9 1690.8 1730.8
## - AGE 1 1680.6 1736.6
## <none> 1680.6 1738.6
## - SEX 1 1686.1 1742.1
## - Clinical 2 1705.7 1759.7
## - SPECIFIC_GROUP 5 1753.9 1801.9
## - PATHOGEN 10 2162.9 2200.9
##
## Step: AIC=1730.81
## response ~ Clinical + SPECIFIC_GROUP + PATHOGEN + SEX + AGE
##
## Df Deviance AIC
## <none> 1690.8 1730.8
## - AGE 1 1694.8 1732.8
## - SEX 1 1695.8 1733.8
## - Clinical 2 1717.3 1753.3
## - SPECIFIC_GROUP 5 1766.8 1796.8
## - PATHOGEN 10 2173.4 2193.4
title <- gsub("response", "Pen (Pen)", deparse(best_model$formula))
hpp <- plotROC(df, best_model, title)
hpp
# --- Other
df <- get_data("other", "Penicillin (Penams)")
full_model <- glm(response ~ ., binomial(link = "logit"), df)
best_model <- MASS::stepAIC(full_model)
## Start: AIC=606.75
## response ~ Plant_pre_harvest + Plant_post_harvest + Water + Soil +
## Other + Hospital + ORIGIN
##
## Df Deviance AIC
## - ORIGIN 8 582.19 600.19
## - Other 1 572.77 604.77
## - Plant_pre_harvest 1 572.78 604.78
## - Plant_post_harvest 1 574.05 606.05
## - Soil 1 574.17 606.17
## <none> 572.75 606.75
## - Water 1 575.94 607.94
## - Hospital 3 592.55 620.55
##
## Step: AIC=600.19
## response ~ Plant_pre_harvest + Plant_post_harvest + Water + Soil +
## Other + Hospital
##
## Df Deviance AIC
## - Other 1 582.20 598.20
## - Plant_pre_harvest 1 582.22 598.22
## - Plant_post_harvest 1 583.71 599.71
## - Soil 1 583.86 599.86
## <none> 582.19 600.19
## - Water 1 584.82 600.82
## - Hospital 3 604.15 616.15
##
## Step: AIC=598.2
## response ~ Plant_pre_harvest + Plant_post_harvest + Water + Soil +
## Hospital
##
## Df Deviance AIC
## - Plant_pre_harvest 1 582.37 596.37
## - Soil 1 584.10 598.10
## <none> 582.20 598.20
## - Plant_post_harvest 1 586.92 600.92
## - Water 1 604.06 618.06
## - Hospital 3 621.63 631.63
##
## Step: AIC=596.37
## response ~ Plant_post_harvest + Water + Soil + Hospital
##
## Df Deviance AIC
## <none> 582.37 596.37
## - Soil 1 585.42 597.42
## - Plant_post_harvest 1 588.12 600.12
## - Water 1 607.22 619.22
## - Hospital 3 622.29 630.29
title <- paste(deparse(best_model$formula), collapse = "") %>%
gsub("response", "Pen (Pen)", .) %>%
gsub(" ", "", .)
opp <- plotROC(df, best_model, title)
opp
# Fluoroquinolone ---------------------------------------------------------
# --- Animal
df <- get_data("animal", "Fluoroquinolone")
full_model <- glm(response ~ ., binomial(link = "logit"), df)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
tmp <- alias(full_model)$Complete
colinear_variables <- tmp[, which(colSums(tmp) != 0), drop = F]
df %<>% select(-PATHOGEN, -Wild_animal)
full_model <- glm(response ~ ., binomial(link = "logit"), df)
best_model <- MASS::stepAIC(full_model)
## Start: AIC=419.79
## response ~ ASSOCIATED_GROUP + Livestock + Companion_animal
##
## Df Deviance AIC
## - ASSOCIATED_GROUP 5 405.91 411.91
## - Livestock 1 404.99 418.99
## - Companion_animal 1 405.03 419.03
## <none> 403.79 419.79
##
## Step: AIC=411.91
## response ~ Livestock + Companion_animal
##
## Df Deviance AIC
## <none> 405.91 411.91
## - Companion_animal 1 415.23 419.23
## - Livestock 1 417.19 421.19
title <- gsub("response", "Flu", deparse(best_model$formula))
aflu <- plotROC(df, best_model, title)
aflu
# --- Human
df <- get_data("human", "Fluoroquinolone")
full_model <- glm(response ~ ., binomial(link = "logit"), df %>%
select(-HOSPITAL_WARD))
best_model <- MASS::stepAIC(full_model)
## Start: AIC=1605.05
## response ~ Clinical + SPECIFIC_GROUP + PATHOGEN + SEX + AGE +
## AGE_GROUP
##
## Df Deviance AIC
## - AGE 1 1547.0 1603.0
## - AGE_GROUP 9 1563.3 1603.3
## <none> 1547.0 1605.0
## - SEX 1 1560.0 1616.0
## - Clinical 2 1562.2 1616.2
## - SPECIFIC_GROUP 5 1628.3 1676.3
## - PATHOGEN 10 2129.1 2167.1
##
## Step: AIC=1603.05
## response ~ Clinical + SPECIFIC_GROUP + PATHOGEN + SEX + AGE_GROUP
##
## Df Deviance AIC
## <none> 1547.0 1603.0
## - AGE_GROUP 9 1574.2 1612.2
## - SEX 1 1560.0 1614.0
## - Clinical 2 1562.2 1614.2
## - SPECIFIC_GROUP 5 1628.7 1674.7
## - PATHOGEN 10 2129.3 2165.3
title <- gsub("response", "Flu", deparse(best_model$formula))
hflu <- plotROC(df, best_model, title)
hflu
# --- Other
df <- get_data("other", "Fluoroquinolone")
full_model <- glm(response ~ ., binomial(link = "logit"), df)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
table(df$response)
##
## 0 1
## 1849 20
# Trimethoprim/Sulfamethoxazole -------------------------------------------
# --- Animal
df <- get_data("animal", "Trimethoprim/Sulfamethoxazole")
full_model <- glm(response ~ ., binomial(link = "logit"), df)
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
full_model <- glm(response ~ ., binomial(link = "logit"), df, maxit = 100)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
tmp <- alias(full_model)$Complete
colinear_variables <- tmp[, which(colSums(tmp) != 0), drop = F]
df %<>% select(-PATHOGEN, -Wild_animal)
full_model <- glm(response ~ ., binomial(link = "logit"), df)
best_model <- MASS::stepAIC(full_model)
## Start: AIC=356.51
## response ~ ASSOCIATED_GROUP + Livestock + Companion_animal
##
## Df Deviance AIC
## - ASSOCIATED_GROUP 5 343.78 349.78
## - Livestock 1 341.35 355.35
## - Companion_animal 1 341.43 355.43
## <none> 340.51 356.51
##
## Step: AIC=349.78
## response ~ Livestock + Companion_animal
##
## Df Deviance AIC
## <none> 343.78 349.78
## - Companion_animal 1 357.03 361.03
## - Livestock 1 358.24 362.24
title <- gsub("response", "Tri/Sul", deparse(best_model$formula))
ats <- plotROC(df, best_model, title)
ats
# --- Human
df <- get_data("human", "Trimethoprim/Sulfamethoxazole")
full_model <- glm(response ~ ., binomial(link = "logit"), df %>%
select(-HOSPITAL_WARD))
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
tmp <- alias(full_model)$Complete
df %<>% select(-PATHOGEN)
full_model <- glm(response ~ ., binomial(link = "logit"), df %>%
select(-HOSPITAL_WARD))
best_model <- MASS::stepAIC(full_model)
## Start: AIC=2008.93
## response ~ Clinical + SPECIFIC_GROUP + SEX + AGE + AGE_GROUP
##
## Df Deviance AIC
## - AGE 1 1971.2 2007.2
## <none> 1970.9 2008.9
## - Clinical 2 1976.7 2010.7
## - SEX 1 1977.8 2013.8
## - AGE_GROUP 9 1994.1 2014.1
## - SPECIFIC_GROUP 5 2061.7 2089.7
##
## Step: AIC=2007.22
## response ~ Clinical + SPECIFIC_GROUP + SEX + AGE_GROUP
##
## Df Deviance AIC
## <none> 1971.2 2007.2
## - Clinical 2 1976.9 2008.9
## - SEX 1 1978.1 2012.1
## - AGE_GROUP 9 2005.0 2023.0
## - SPECIFIC_GROUP 5 2061.7 2087.7
title <- gsub("response", "Tri/Sul", deparse(best_model$formula))
hts <- plotROC(df, best_model, title)
hts
# --- Other
df <- get_data("other", "Fluoroquinolone")
full_model <- glm(response ~ ., binomial(link = "logit"), df)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
table(df$response)
##
## 0 1
## 1849 20
# Aminoglycoside ----------------------------------------------------------
# --- Animal
df <- get_data("animal", "Aminoglycoside")
full_model <- glm(response ~ ., binomial(link = "logit"), df)
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
full_model <- glm(response ~ ., binomial(link = "logit"), df, maxit = 100)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
tmp <- alias(full_model)$Complete
colinear_variables <- tmp[, which(colSums(tmp) != 0), drop = F]
df %<>% select(-PATHOGEN, -Wild_animal)
full_model <- glm(response ~ ., binomial(link = "logit"), df)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
model1 <- glm(response ~ ASSOCIATED_GROUP, binomial(link = "logit"), df)
model2 <- glm(response ~ Livestock + Companion_animal,
binomial(link = "logit"), df)
AIC(model1, model2) # model2 is better
## df AIC
## model1 6 144.5713
## model2 3 130.3123
title <- gsub("response", "Amin", deparse(best_model$formula))
aam <- plotROC(df, model2, title)
aam
# --- Human
df <- get_data("human", "Aminoglycoside")
full_model <- glm(response ~ ., binomial(link = "logit"), df %>%
select(-HOSPITAL_WARD))
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
tmp <- alias(full_model)$Complete
df %<>% select(-PATHOGEN)
full_model <- glm(response ~ ., binomial(link = "logit"), df %>%
select(-HOSPITAL_WARD))
best_model <- MASS::stepAIC(full_model)
## Start: AIC=1914.63
## response ~ Clinical + SPECIFIC_GROUP + SEX + AGE + AGE_GROUP
##
## Df Deviance AIC
## - AGE 1 1876.6 1912.6
## <none> 1876.6 1914.6
## - Clinical 2 1881.1 1915.1
## - AGE_GROUP 9 1895.5 1915.5
## - SEX 1 1892.7 1928.7
## - SPECIFIC_GROUP 5 1984.2 2012.2
##
## Step: AIC=1912.63
## response ~ Clinical + SPECIFIC_GROUP + SEX + AGE_GROUP
##
## Df Deviance AIC
## <none> 1876.6 1912.6
## - Clinical 2 1881.1 1913.1
## - SEX 1 1892.7 1926.7
## - AGE_GROUP 9 1929.2 1947.2
## - SPECIFIC_GROUP 5 1984.5 2010.5
title <- gsub("response", "Amin", deparse(best_model$formula))
ham <- plotROC(df, best_model, title)
ham
# --- Other
df <- get_data("other", "Aminoglycoside")
full_model <- glm(response ~ ., binomial(link = "logit"), df)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
table(df$response)
##
## 0 1
## 1860 9
# Nitrofurantoin ----------------------------------------------------------
# --- Animal
df <- get_data("animal", "Nitrofurantoin")
full_model <- glm(response ~ ., binomial(link = "logit"), df)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
tmp <- alias(full_model)$Complete
colinear_variables <- tmp[, which(colSums(tmp) != 0), drop = F]
df %<>% select(-PATHOGEN, -Wild_animal)
full_model <- glm(response ~ ., binomial(link = "logit"), df)
best_model <- MASS::stepAIC(full_model)
## Start: AIC=888.09
## response ~ ASSOCIATED_GROUP + Livestock + Companion_animal
##
## Df Deviance AIC
## - Companion_animal 1 873.56 887.56
## <none> 872.09 888.09
## - Livestock 1 874.72 888.72
## - ASSOCIATED_GROUP 5 899.58 905.58
##
## Step: AIC=887.56
## response ~ ASSOCIATED_GROUP + Livestock
##
## Df Deviance AIC
## <none> 873.56 887.56
## - Livestock 1 879.72 891.72
## - ASSOCIATED_GROUP 5 906.05 910.05
title <- gsub("response", "Nitr", deparse(best_model$formula))
anit <- plotROC(df, best_model, title)
anit
# --- Human
df <- get_data("human", "Nitrofurantoin")
full_model <- glm(response ~ ., binomial(link = "logit"), df %>%
select(-HOSPITAL_WARD))
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
tmp <- alias(full_model)$Complete
df %<>% select(-PATHOGEN)
full_model <- glm(response ~ ., binomial(link = "logit"), df %>%
select(-HOSPITAL_WARD))
best_model <- MASS::stepAIC(full_model)
## Start: AIC=1130.35
## response ~ Clinical + SPECIFIC_GROUP + SEX + AGE + AGE_GROUP
##
## Df Deviance AIC
## - AGE_GROUP 9 1097.8 1117.8
## - Clinical 2 1093.0 1127.0
## - SPECIFIC_GROUP 5 1101.5 1129.5
## - AGE 1 1094.0 1130.0
## - SEX 1 1094.2 1130.2
## <none> 1092.3 1130.3
##
## Step: AIC=1117.77
## response ~ Clinical + SPECIFIC_GROUP + SEX + AGE
##
## Df Deviance AIC
## - Clinical 2 1098.5 1114.5
## - SPECIFIC_GROUP 5 1106.7 1116.7
## - SEX 1 1099.6 1117.6
## <none> 1097.8 1117.8
## - AGE 1 1102.9 1120.9
##
## Step: AIC=1114.47
## response ~ SPECIFIC_GROUP + SEX + AGE
##
## Df Deviance AIC
## - SEX 1 1100.4 1114.4
## <none> 1098.5 1114.5
## - SPECIFIC_GROUP 5 1109.5 1115.5
## - AGE 1 1103.9 1117.9
##
## Step: AIC=1114.42
## response ~ SPECIFIC_GROUP + AGE
##
## Df Deviance AIC
## <none> 1100.4 1114.4
## - SPECIFIC_GROUP 5 1111.3 1115.3
## - AGE 1 1105.6 1117.6
title <- gsub("response", "Nitr", deparse(best_model$formula))
hnit <- plotROC(df, best_model, title)
hnit
# --- Other
df <- get_data("other", "Nitrofurantoin")
full_model <- glm(response ~ ., binomial(link = "logit"), df)
best_model <- MASS::stepAIC(full_model)
## Start: AIC=551.53
## response ~ Plant_pre_harvest + Plant_post_harvest + Water + Soil +
## Other + Hospital + ORIGIN
##
## Df Deviance AIC
## - ORIGIN 8 525.83 543.83
## - Plant_post_harvest 1 517.69 549.69
## - Plant_pre_harvest 1 518.05 550.05
## - Other 1 518.38 550.38
## - Water 1 519.45 551.45
## <none> 517.53 551.53
## - Soil 1 520.74 552.74
## - Hospital 3 533.06 561.06
##
## Step: AIC=543.83
## response ~ Plant_pre_harvest + Plant_post_harvest + Water + Soil +
## Other + Hospital
##
## Df Deviance AIC
## - Plant_post_harvest 1 525.83 541.83
## - Plant_pre_harvest 1 526.22 542.22
## - Other 1 527.39 543.39
## <none> 525.83 543.83
## - Water 1 528.74 544.74
## - Soil 1 529.61 545.61
## - Hospital 3 544.43 556.43
##
## Step: AIC=541.83
## response ~ Plant_pre_harvest + Water + Soil + Other + Hospital
##
## Df Deviance AIC
## - Plant_pre_harvest 1 527.05 541.05
## <none> 525.83 541.83
## - Other 1 528.93 542.93
## - Soil 1 529.62 543.62
## - Water 1 532.48 546.48
## - Hospital 3 551.98 561.98
##
## Step: AIC=541.05
## response ~ Water + Soil + Other + Hospital
##
## Df Deviance AIC
## <none> 527.05 541.05
## - Soil 1 530.65 542.65
## - Other 1 533.58 545.58
## - Water 1 540.77 552.77
## - Hospital 3 556.79 564.79
title <- gsub("response", "Nitr", deparse(best_model$formula))
onit <- plotROC(df, best_model, title)
onit
# Carbapenem --------------------------------------------------------------
# --- Animal
df <- get_data("animal", "Carbapenem")
table(df$response)
##
## 0
## 2488
# --- Human
df <- get_data("human", "Carbapenem")
full_model <- glm(response ~ ., binomial(link = "logit"), df %>%
select(-HOSPITAL_WARD))
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
tmp <- alias(full_model)$Complete
df %<>% select(-PATHOGEN)
full_model <- glm(response ~ ., binomial(link = "logit"), df %>%
select(-HOSPITAL_WARD))
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
df %<>% select(-AGE_GROUP)
full_model <- glm(response ~ ., binomial(link = "logit"), df %>%
select(-HOSPITAL_WARD))
best_model <- MASS::stepAIC(full_model)
## Start: AIC=1353.52
## response ~ Clinical + SPECIFIC_GROUP + SEX + AGE
##
## Df Deviance AIC
## <none> 1333.5 1353.5
## - SEX 1 1340.8 1358.8
## - Clinical 2 1353.3 1369.3
## - AGE 1 1359.7 1377.7
## - SPECIFIC_GROUP 5 1369.9 1379.9
deparse(best_model$formula)
## [1] "response ~ Clinical + SPECIFIC_GROUP + SEX + AGE"
model2 <- glmer(response ~ Clinical + SPECIFIC_GROUP + SEX + AGE + (1|HOSPITAL_WARD),
df, binomial(link = "logit"))
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : unable to evaluate scaled gradient
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge: degenerate Hessian with 1
## negative eigenvalues
# summary(allFit(model2))
title <- gsub("response", "Carb", deparse(best_model$formula))
hcar <- plotROC(df, best_model, title)
hcar
# --- Other
df <- get_data("other", "Carbapenem")
full_model <- glm(response ~ ., binomial(link = "logit"), df)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
table(df$response)
##
## 0 1
## 1863 6
# Trimethoprim ------------------------------------------------------------
# --- Animal
df <- get_data("animal", "Trimethoprim")
full_model <- glm(response ~ ., binomial(link = "logit"), df)
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
full_model <- glm(response ~ ., binomial(link = "logit"), df, maxit = 100)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
tmp <- alias(full_model)$Complete
colinear_variables <- tmp[, which(colSums(tmp) != 0), drop = F]
df %<>% select(-PATHOGEN, -Wild_animal)
full_model <- glm(response ~ ., binomial(link = "logit"), df)
best_model <- MASS::stepAIC(full_model)
## Start: AIC=389.51
## response ~ ASSOCIATED_GROUP + Livestock + Companion_animal
##
## Df Deviance AIC
## - ASSOCIATED_GROUP 5 375.71 381.71
## - Livestock 1 374.54 388.54
## - Companion_animal 1 374.64 388.64
## <none> 373.51 389.51
##
## Step: AIC=381.71
## response ~ Livestock + Companion_animal
##
## Df Deviance AIC
## <none> 375.71 381.71
## - Companion_animal 1 390.44 394.44
## - Livestock 1 391.92 395.92
title <- gsub("response", "Tri", deparse(best_model$formula))
atri <- plotROC(df, best_model, title)
atri
# --- Human
df <- get_data("human", "Trimethoprim")
full_model <- glm(response ~ ., binomial(link = "logit"), df %>%
select(-HOSPITAL_WARD))
best_model <- MASS::stepAIC(full_model)
## Start: AIC=1048.97
## response ~ Clinical + SPECIFIC_GROUP + PATHOGEN + SEX + AGE +
## AGE_GROUP
##
## Df Deviance AIC
## - AGE 1 990.98 1047.0
## - SEX 1 991.01 1047.0
## - AGE_GROUP 9 1008.04 1048.0
## - Clinical 2 994.48 1048.5
## <none> 990.97 1049.0
## - SPECIFIC_GROUP 5 1001.29 1049.3
## - PATHOGEN 10 1159.52 1197.5
##
## Step: AIC=1046.98
## response ~ Clinical + SPECIFIC_GROUP + PATHOGEN + SEX + AGE_GROUP
##
## Df Deviance AIC
## - SEX 1 991.02 1045.0
## - AGE_GROUP 9 1008.08 1046.1
## - Clinical 2 994.48 1046.5
## <none> 990.98 1047.0
## - SPECIFIC_GROUP 5 1001.29 1047.3
## - PATHOGEN 10 1159.66 1195.7
##
## Step: AIC=1045.02
## response ~ Clinical + SPECIFIC_GROUP + PATHOGEN + AGE_GROUP
##
## Df Deviance AIC
## - AGE_GROUP 9 1008.09 1044.1
## - Clinical 2 994.58 1044.6
## <none> 991.02 1045.0
## - SPECIFIC_GROUP 5 1001.34 1045.3
## - PATHOGEN 10 1159.73 1193.7
##
## Step: AIC=1044.09
## response ~ Clinical + SPECIFIC_GROUP + PATHOGEN
##
## Df Deviance AIC
## - Clinical 2 1011.9 1043.9
## <none> 1008.1 1044.1
## - SPECIFIC_GROUP 5 1019.4 1045.4
## - PATHOGEN 10 1176.0 1192.0
##
## Step: AIC=1043.93
## response ~ SPECIFIC_GROUP + PATHOGEN
##
## Df Deviance AIC
## <none> 1011.9 1043.9
## - SPECIFIC_GROUP 5 1027.2 1049.2
## - PATHOGEN 10 1177.6 1189.6
title <- gsub("response", "Tri", deparse(best_model$formula))
htri <- plotROC(df, best_model, title)
htri
# --- Other
df <- get_data("other", "Trimethoprim")
full_model <- glm(response ~ ., binomial(link = "logit"), df)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
table(df$response)
##
## 0 1
## 1855 14
# Tetracycline ------------------------------------------------------------
# --- Animal
df <- get_data("animal", "Tetracycline")
full_model <- glm(response ~ ., binomial(link = "logit"), df)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
tmp <- alias(full_model)$Complete
colinear_variables <- tmp[, which(colSums(tmp) != 0), drop = F]
df %<>% select(-PATHOGEN, -Wild_animal)
full_model <- glm(response ~ ., binomial(link = "logit"), df)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
model1 <- glm(response ~ ASSOCIATED_GROUP, binomial(link = "logit"), df)
model2 <- glm(response ~ Livestock + Companion_animal,
binomial(link = "logit"), df)
AIC(model1, model2) # model2 is better
## df AIC
## model1 6 124.0903
## model2 3 116.2254
title <- gsub("response", "Tetr", deparse(best_model$formula))
atet <- plotROC(df, model2, title)
atet
# --- Human
df <- get_data("human", "Tetracycline")
full_model <- glm(response ~ ., binomial(link = "logit"), df %>%
select(-HOSPITAL_WARD))
best_model <- MASS::stepAIC(full_model)
## Start: AIC=867.33
## response ~ Clinical + SPECIFIC_GROUP + PATHOGEN + SEX + AGE +
## AGE_GROUP
##
## Df Deviance AIC
## <none> 809.33 867.33
## - AGE 1 811.53 867.53
## - SEX 1 812.09 868.09
## - AGE_GROUP 9 836.14 876.14
## - SPECIFIC_GROUP 5 830.34 878.34
## - Clinical 2 830.20 884.20
## - PATHOGEN 10 1031.02 1069.02
title <- gsub("response", "Tetr", deparse(best_model$formula))
htet <- plotROC(df, best_model, title)
htet
# --- Other
df <- get_data("other", "Tetracycline")
full_model <- glm(response ~ ., binomial(link = "logit"), df)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
table(df$response)
##
## 0 1
## 1862 7
# Monobactam --------------------------------------------------------------
# --- Animal
df <- get_data("animal", "Monobactam")
full_model <- glm(response ~ ., binomial(link = "logit"), df)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
tmp <- alias(full_model)$Complete
colinear_variables <- tmp[, which(colSums(tmp) != 0), drop = F]
df %<>% select(-PATHOGEN, -Wild_animal)
full_model <- glm(response ~ ., binomial(link = "logit"), df)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
best_model <- MASS::stepAIC(full_model)
## Start: AIC=192.26
## response ~ ASSOCIATED_GROUP + Livestock + Companion_animal
##
## Df Deviance AIC
## - ASSOCIATED_GROUP 5 181.75 187.75
## - Livestock 1 176.40 190.40
## - Companion_animal 1 176.91 190.91
## <none> 176.26 192.26
##
## Step: AIC=187.75
## response ~ Livestock + Companion_animal
##
## Df Deviance AIC
## - Livestock 1 182.23 186.23
## <none> 181.75 187.75
## - Companion_animal 1 189.79 193.79
##
## Step: AIC=186.23
## response ~ Companion_animal
##
## Df Deviance AIC
## <none> 182.23 186.23
## - Companion_animal 1 193.39 195.39
title <- gsub("response", "Mono", deparse(best_model$formula))
amon <- plotROC(df, best_model, title)
amon
# --- Human
df <- get_data("human", "Monobactam")
full_model <- glm(response ~ ., binomial(link = "logit"), df %>%
select(-HOSPITAL_WARD))
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
tmp <- alias(full_model)$Complete
df %<>% select(-PATHOGEN)
full_model <- glm(response ~ ., binomial(link = "logit"), df %>%
select(-HOSPITAL_WARD))
best_model <- MASS::stepAIC(full_model)
## Start: AIC=950.74
## response ~ Clinical + SPECIFIC_GROUP + SEX + AGE + AGE_GROUP
##
## Df Deviance AIC
## - AGE_GROUP 9 923.73 943.73
## - AGE 1 912.83 948.83
## - SEX 1 914.24 950.24
## <none> 912.74 950.74
## - Clinical 2 919.04 953.04
## - SPECIFIC_GROUP 5 926.15 954.15
##
## Step: AIC=943.73
## response ~ Clinical + SPECIFIC_GROUP + SEX + AGE
##
## Df Deviance AIC
## - SEX 1 925.38 943.38
## <none> 923.73 943.73
## - Clinical 2 931.48 947.48
## - AGE 1 929.61 947.61
## - SPECIFIC_GROUP 5 938.08 948.08
##
## Step: AIC=943.38
## response ~ Clinical + SPECIFIC_GROUP + AGE
##
## Df Deviance AIC
## <none> 925.38 943.38
## - AGE 1 930.97 946.97
## - Clinical 2 934.02 948.02
## - SPECIFIC_GROUP 5 940.93 948.93
title <- gsub("response", "Mono", deparse(best_model$formula))
hmon <- plotROC(df, best_model, title)
hmon
# --- Other
df <- get_data("other", "Monobactam")
full_model <- glm(response ~ ., binomial(link = "logit"), df)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
table(df$response)
##
## 0 1
## 1861 8
# Colistin ----------------------------------------------------------------
# --- Animal
df <- get_data("animal", "Colistin")
full_model <- glm(response ~ ., binomial(link = "logit"), df)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
tmp <- alias(full_model)$Complete
colinear_variables <- tmp[, which(colSums(tmp) != 0), drop = F]
df %<>% select(-PATHOGEN, -Wild_animal)
full_model <- glm(response ~ ., binomial(link = "logit"), df)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
model1 <- glm(response ~ ASSOCIATED_GROUP, binomial(link = "logit"), df)
model2 <- glm(response ~ Livestock + Companion_animal,
binomial(link = "logit"), df)
AIC(model1, model2) # model2 is better
## df AIC
## model1 6 68.31727
## model2 3 61.48973
title <- gsub("response", "Col", deparse(best_model$formula))
acol <- plotROC(df, model2, title)
acol
# --- Human
df <- get_data("human", "Colistin")
full_model <- glm(response ~ ., binomial(link = "logit"), df %>%
select(-HOSPITAL_WARD))
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
tmp <- alias(full_model)$Complete
df %<>% select(-PATHOGEN)
full_model <- glm(response ~ ., binomial(link = "logit"), df %>%
select(-HOSPITAL_WARD))
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
df %<>% select(-AGE_GROUP)
full_model <- glm(response ~ ., binomial(link = "logit"), df %>%
select(-HOSPITAL_WARD))
best_model <- MASS::stepAIC(full_model)
## Start: AIC=423.37
## response ~ Clinical + SPECIFIC_GROUP + SEX + AGE
##
## Df Deviance AIC
## - SEX 1 403.40 421.40
## - AGE 1 404.33 422.33
## <none> 403.37 423.37
## - SPECIFIC_GROUP 5 414.66 424.66
## - Clinical 2 422.97 438.97
##
## Step: AIC=421.4
## response ~ Clinical + SPECIFIC_GROUP + AGE
##
## Df Deviance AIC
## - AGE 1 404.38 420.38
## <none> 403.40 421.40
## - SPECIFIC_GROUP 5 414.66 422.66
## - Clinical 2 423.26 437.26
##
## Step: AIC=420.38
## response ~ Clinical + SPECIFIC_GROUP
##
## Df Deviance AIC
## <none> 404.38 420.38
## - SPECIFIC_GROUP 5 414.86 420.86
## - Clinical 2 425.34 437.34
title <- gsub("response", "Col", deparse(best_model$formula))
hcol <- plotROC(df, best_model, title)
hcol
# --- Other
df <- get_data("other", "Colistin")
full_model <- glm(response ~ ., binomial(link = "logit"), df)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
table(df$response)
##
## 0 1
## 1868 1
# animals <- cowplot::plot_grid(aam, acep, acol, aflu, afos, amon, anit, apen,
# apenc, app, atet, atri, ats, align = "vh",
# axis = "tb", ncol = 2, labels = letters[1:13])
# ggsave("animals.pdf", animals, width = 12, height = 24)
#
# humans <- cowplot::plot_grid(ham, hcar, hcep, hcol, hflu, hfos, hmon, hnit,
# hpen, hpenc, hpp, htet, htri, hts, align = "vh",
# axis = "tb", ncol = 2, labels = letters[1:14])
# ggsave("humans.pdf", humans, width = 12, height = 24)
#
# others <- cowplot::plot_grid(ocep, ofos, onit, open, openc, opp, align = "vh",
# axis = "tb", ncol = 2, labels = letters[1:6])
# ggsave("others.pdf", others, width = 12, height = 10)